library(ggplot2)
library(stringr)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-6. For overview type 'help("mgcv-package")'.
set.seed(123)
data_length <- read.csv("~/mysis/data/lengthData.csv", skip = 4, header = TRUE)
net <- data_length$Size..m.
y <- data_length$Length..mm.
gender <- as.factor(substr(str_trim(as.character(data_length$Gender)), 0, 1))
## remove whitespace from string, then truncate into
## broad gender classes, not subclasses
date <- as.numeric(data_length$Date)
date[date == 2] <- 1 ## collapse into monthly values
date[date == 3] <- 2
date[date == 4] <- 3
date <- as.factor(date)
plot(as.numeric(date), type='l', main = "Is this a data error? - yes?")
## correct date mislabeling
date <- as.numeric(data_length$Date)
date[7330:7519] <- 4
date[7559:7773] <- 4
date[date == 2] <- 1 ## collapse into monthly values
date[date == 3] <- 2
date[date == 4] <- 3
date <- as.factor(date)
plot(as.numeric(date), type='l', main = "Data looks better")
data <- data.frame(y=y, date=date, net=net, gender=gender)
X <- model.matrix(~ net + date + gender, data=data)
ggplot(data=data, aes(y)) + geom_histogram() + facet_grid(net ~ date)
idx <- attr(uniquecombs(X), "index")
\(t\) = time \(i\) = net size \(j\) = age class \(k\) = replicate
\[ y_{tijk} \sim \mbox{N}(\mu_{tij}, \sigma^2_{tij}) \]
## Loading required package: Rcpp
## Loading required package: inline
##
## Attaching package: 'inline'
##
## The following object is masked from 'package:Rcpp':
##
## registerPlugin
##
## rstan (Version 2.5.0, packaged: 2014-10-22 14:19:22 UTC, GitRev: e52c66f42e81)
##
## TRANSLATING MODEL 'lengthHeterogeneous' FROM Stan CODE TO C++ CODE NOW.
## COMPILING THE C++ CODE FOR MODEL 'lengthHeterogeneous' NOW.
## In file included from file1c376ce59b92.cpp:421:
## In file included from /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include/rstan/rstaninc.hpp:3:
## /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include/rstan/stan_fit.hpp:732:15: warning: unused variable 'return_code' [-Wunused-variable]
## int return_code = stan::common::do_bfgs_optimize(model, lbfgs, base_rng,
## ^
## /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include/rstan/stan_fit.hpp:796:15: warning: unused variable 'return_code' [-Wunused-variable]
## int return_code = stan::common::do_bfgs_optimize(model, bfgs, base_rng,
## ^
## /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include/rstan/stan_fit.hpp:616:14: warning: unused variable 'init_log_prob' [-Wunused-variable]
## double init_log_prob;
## ^
## In file included from file1c376ce59b92.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include//stansrc/stan/model/model_header.hpp:16:
## In file included from /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include//stansrc/stan/agrad/rev.hpp:5:
## /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include//stansrc/stan/agrad/rev/chainable.hpp:87:17: warning: 'static' function 'set_zero_all_adjoints' declared in header file should be declared 'static inline' [-Wunneeded-internal-declaration]
## static void set_zero_all_adjoints() {
## ^
## In file included from file1c376ce59b92.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include//stansrc/stan/model/model_header.hpp:23:
## /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rstan/include//stansrc/stan/io/dump.hpp:26:14: warning: function 'product' is not needed and will not be emitted [-Wunneeded-internal-declaration]
## size_t product(std::vector<size_t> dims) {
## ^
## 5 warnings generated.
## the number of chains is less than 1; sampling not done
m
## Inference for Stan model: lengthHeterogeneous.
## 4 chains, each with iter=5000; warmup=2500; thin=1;
## post-warmup draws per chain=2500, total post-warmup draws=10000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5%
## beta[1] 15.38 0.00 0.13 15.12 15.29 15.38 15.46 15.63
## beta[2] 0.10 0.00 0.08 -0.05 0.05 0.10 0.15 0.25
## beta[3] 1.38 0.00 0.04 1.31 1.36 1.38 1.41 1.46
## beta[4] 1.91 0.00 0.05 1.82 1.88 1.92 1.95 2.01
## beta[5] -8.61 0.00 0.11 -8.82 -8.68 -8.61 -8.53 -8.39
## beta[6] -1.61 0.00 0.12 -1.84 -1.69 -1.61 -1.53 -1.38
## beta[7] -4.82 0.00 0.12 -5.05 -4.91 -4.82 -4.74 -4.59
## s[1] 1.96 0.00 0.23 1.57 1.79 1.94 2.10 2.46
## s[2] 2.85 0.00 0.22 2.45 2.69 2.83 2.99 3.32
## s[3] 1.98 0.00 0.11 1.78 1.90 1.98 2.05 2.21
## s[4] 1.33 0.00 0.04 1.26 1.31 1.33 1.36 1.40
## s[5] 2.53 0.00 0.29 2.03 2.32 2.51 2.71 3.18
## s[6] 1.65 0.00 0.11 1.46 1.58 1.65 1.72 1.88
## s[7] 1.94 0.00 0.15 1.67 1.83 1.93 2.04 2.27
## s[8] 0.83 0.00 0.06 0.72 0.79 0.83 0.87 0.96
## s[9] 1.47 0.00 0.18 1.18 1.35 1.46 1.58 1.87
## s[10] 2.02 0.00 0.12 1.79 1.93 2.01 2.10 2.27
## s[11] 2.05 0.00 0.16 1.77 1.94 2.04 2.15 2.39
## s[12] 0.97 0.00 0.05 0.87 0.93 0.97 1.00 1.07
## s[13] 2.53 0.00 0.13 2.28 2.43 2.52 2.61 2.79
## s[14] 3.10 0.00 0.13 2.87 3.02 3.10 3.18 3.36
## s[15] 1.89 0.00 0.07 1.77 1.84 1.89 1.93 2.02
## s[16] 1.31 0.00 0.02 1.28 1.30 1.31 1.32 1.35
## s[17] 2.50 0.00 0.12 2.28 2.42 2.50 2.58 2.74
## s[18] 1.78 0.00 0.05 1.68 1.74 1.78 1.81 1.88
## s[19] 1.79 0.00 0.06 1.67 1.75 1.79 1.83 1.92
## s[20] 0.78 0.00 0.04 0.71 0.75 0.78 0.80 0.86
## s[21] 2.28 0.00 0.15 2.00 2.17 2.27 2.37 2.60
## s[22] 1.96 0.00 0.07 1.83 1.91 1.95 2.00 2.10
## s[23] 1.93 0.00 0.09 1.77 1.87 1.93 1.99 2.12
## s[24] 1.06 0.00 0.03 1.01 1.04 1.06 1.08 1.12
## lp__ -8467.68 0.06 3.94 -8476.34 -8470.17 -8467.34 -8464.88 -8461.00
## n_eff Rhat
## beta[1] 4635 1
## beta[2] 10000 1
## beta[3] 10000 1
## beta[4] 8347 1
## beta[5] 4344 1
## beta[6] 4523 1
## beta[7] 4799 1
## s[1] 10000 1
## s[2] 10000 1
## s[3] 10000 1
## s[4] 10000 1
## s[5] 10000 1
## s[6] 10000 1
## s[7] 10000 1
## s[8] 10000 1
## s[9] 10000 1
## s[10] 10000 1
## s[11] 10000 1
## s[12] 10000 1
## s[13] 10000 1
## s[14] 10000 1
## s[15] 10000 1
## s[16] 10000 1
## s[17] 10000 1
## s[18] 10000 1
## s[19] 10000 1
## s[20] 10000 1
## s[21] 10000 1
## s[22] 10000 1
## s[23] 10000 1
## s[24] 10000 1
## lp__ 4508 1
##
## Samples were drawn using NUTS(diag_e) at Fri Sep 4 12:35:27 2015.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
rstan::traceplot(m, inc_warmup = FALSE)
e <- rstan::extract(m, pars = c("beta", "s"))
par(mfrow = c(3, 3))
for(i in 1:2) {
if(i == 1) {
for(j in 1:dim(e[[i]])[2]) {
plot(density(e[[i]][,j]), main = paste("beta", j, sep=""))
}
} else if(i == 2) {
plot(density(e[[i]]), main = "s")
}
}
library(ggmcmc)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:nlme':
##
## collapse
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
##
## The following object is masked from 'package:rstan':
##
## extract
library(coda)
##
## Attaching package: 'coda'
##
## The following object is masked from 'package:rstan':
##
## traceplot
s <- mcmc.list(lapply(1:ncol(m), function(x) mcmc(as.array(m)[,x,])))
S <- ggs(s)
ggs_caterpillar(S, family = "beta")
library(Gmisc)
## Loading required package: htmlTable
tableData <- cbind(summary(s)$statistics[1:(dim(e[[1]])[2] + 1), 1],
summary(s)$quantiles[1:(dim(e[[1]])[2] + 1), c(1, 5)])
htmlTable(x=round(tableData, digits=2),
caption=paste("Regression parameter estimates and CI's"),
header = c("Estimate", "Lower 95% CI", "Upper 95% CI"),
rowlabel = "Variables",
rnames = c("Intercept",
"Net Size",
"August",
"September",
"Juvenile",
"Male",
"Unknown",
"standard deviation"))
| Regression parameter estimates and CI’s | |||
| Variables | Estimate | Lower 95% CI | Upper 95% CI |
|---|---|---|---|
| Intercept | 15.38 | 15.12 | 15.63 |
| Net Size | 0.1 | -0.05 | 0.25 |
| August | 1.38 | 1.31 | 1.46 |
| September | 1.91 | 1.82 | 2.01 |
| Juvenile | -8.61 | -8.82 | -8.39 |
| Male | -1.61 | -1.84 | -1.38 |
| Unknown | -4.82 | -5.05 | -4.59 |
| standard deviation | 1.96 | 1.57 | 2.46 |
summary(lm(y ~ net + date + gender))
##
## Call:
## lm(formula = y ~ net + date + gender)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.1201 -1.1032 -0.0632 1.0397 10.9368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.73781 0.10216 154.049 < 2e-16 ***
## net 0.22246 0.08634 2.577 0.00999 **
## date2 0.97987 0.04592 21.336 < 2e-16 ***
## date3 0.90260 0.04660 19.368 < 2e-16 ***
## genderJ -8.97707 0.06594 -136.130 < 2e-16 ***
## genderM -1.63806 0.07241 -22.621 < 2e-16 ***
## genderU -4.38327 0.07085 -61.870 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.647 on 9120 degrees of freedom
## Multiple R-squared: 0.8267, Adjusted R-squared: 0.8266
## F-statistic: 7252 on 6 and 9120 DF, p-value: < 2.2e-16
plot(lm(y ~ net + date + gender))
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
summary(rlm(y ~ net + date + gender))
##
## Call: rlm(formula = y ~ net + date + gender)
## Residuals:
## Min 1Q Median 3Q Max
## -15.2080 -1.0124 -0.0275 1.0325 11.0125
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 15.7354 0.0950 165.5745
## net 0.1551 0.0803 1.9309
## date2 1.1375 0.0427 26.6261
## date3 1.1334 0.0434 26.1438
## genderJ -8.9830 0.0613 -146.4337
## genderM -1.5479 0.0674 -22.9787
## genderU -4.7710 0.0659 -72.3921
##
## Residual standard error: 1.516 on 9120 degrees of freedom
plot(rlm(y ~ net + date + gender))